/*******************************************************************************
Last update: June 29, 2023 
********************************************************************************/

cls
clear all
local dir "~\Dropbox\Working Papers\Distinction Effect"
cd "`dir'"

qui do "Does\Data Processing\0. Cleaning Programs.do"


/*******************************************************************************/
//1. Saber Pro Data
use "Data\Intermediates\Saber Pro 2006-2015.dta", clear

//Prepare Variables
rename cod_universidad spro_cllgcode 
rename programa_snies spro_sniescode
rename programa_codicfes spro_programcode
rename estu_estrato spro_stratum
rename eval_tipodocumento doc_tipo 
rename eval_documento doc_num
rename periodo exam_time
rename fami_cod_educa_madre spro_educmother

tostring exam_time, gen(exam_year)
replace exam_year = substr(exam_year, 1, 4)
destring exam_year, replace

gen spro_birthdate = date(estu_fechanacimiento, "DMY")
format spro_birthdate %td

//Keep relevant variables
keep consecutivo doc* exam_time exam_year spro_cllgcode spro_programcode spro_sniescode spro_birthdate spro_stratum spro_educmother

bys doc* : egen min = min(exam_time)
keep if exam_time == min 
drop min

duplicates tag doc_tipo doc_num, gen(dup)
drop if dup > 0

tempfile SaberPro
save `SaberPro', replace


/*******************************************************************************/
//2. SPADIES Data
global X doc_* ies sexo nac_* e_* *_per estrato puntaje_icfes periodo_icfes programa_id aread prog_nivel prog_nucleo prim_sem periodo edu_madre_icfes materias_*
use $X using "Data\Originals\SPADIES\Spadies 2016.dta", clear
foreach year of numlist 2006(1)2009{
	gen aux = 1 if periodo == `year'1 | periodo == `year'2
	bys doc_* programa_id: egen d_`year' = max(aux)
	drop aux
}

bys doc_* programa_id: egen aux = total(materias_tomadas)
drop materias_tomadas
rename aux materias_tomadas
bys doc_* programa_id: egen aux = total(materias_aprobadas)
drop materias_aprobadas
rename aux materias_aprobadas
keep if prim_sem == periodo
drop periodo

// Drop if wrong id
drop if doc_num==. | doc_num==0| doc_num <= 3000000
drop if doc_num > 10000000000 & doc_tipo == "C"
drop if doc_num > 100000000000 & doc_tipo == "T"
keep if doc_tipo == "T" | doc_tipo == "C"
tostring doc_num, replace format(%12.0f)

//Homogenize Program Codes: old codes to SNIES codes
preserve
	use "Data/Originals/SNIES/Programas_2015.dta", clear
	rename cdigoanterioricfes program_oldcode
	rename cdigosniesdelprograma program_code
	rename nmeroperiodosdeduracin program_length
	rename periodosdeduracin program_periodicity
	rename metodologa program_methodology
	rename niveldeformacin program_level
	rename nivelacadmico program_academic_level 
	rename ncleobsicodelconocimientonbc program_field

	keep program_*
	keep if program_academic_level == "PREGRADO"
	drop program_academic_level

	replace program_oldcode = trim(itrim(program_oldcode))
	replace program_oldcode = "'" + program_oldcode
	tempfile Programs
	save `Programs', replace
restore

rename programa_id program_oldcode
merge m:1 program_oldcode using `Programs', keepusing(program_oldcode program_code)
keep if _merge == 3 | _merge == 1
drop _merge

//old codes to SNIES codes (Manually merged)
merge m:1 program_oldcode using "Data/Originals/SNIES/ICFEScodes_SNIEScodes.dta"
keep if _merge == 3 | _merge == 1
drop _merge 

merge m:1 program_oldcode using "Data/Originals/SNIES/ICFEScodes_SNIEScodes_OLE.dta"
keep if _merge == 3 | _merge == 1
drop _merge 

//4.1 Merge Programs Information
replace program_code = cod_snies if program_code == "" & cod_snies != ""
replace program_code = idprograma if program_code == "" & idprograma != ""
replace program_code = subinstr(program_oldcode, "'", "", .) if program_code == "" & program_oldcode != ""
drop if program_code == ""
drop cod_snies idprograma
rename program_oldcode programa_id

merge m:1 program_code using `Programs'
keep if _merge == 3 //| _merge == 1
drop _merge

keep if program_level == "UNIVERSITARIA"


/*******************************************************************************/
//3. Merge SPADIES and Saber Pro

merge m:1 doc* using `SaberPro'
keep if _merge == 3 | _merge == 1
gen spa_spro = _merge==3
drop _merge 
	
save "Data\Finals\EnrollmentRecords_CollegeExam.dta", replace
